library(knitr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)
library(DT)
library(ggthemes)
library(PMCMR)
library(scmamp)
knitr::opts_chunk$set(dpi=96, results = "asis")
knitr::knit_hooks$set(inline = function(x) {
  prettyNum(x, big.mark=",")
})
SAVE_EPS = F
prettyTable <- function(table_df, round_columns=numeric(), round_digits=3, make_plot=T) {
    DT::datatable(table_df, style="bootstrap", filter = "top", rownames = FALSE, extensions = "Buttons",
                  options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print'))) %>%
    formatRound(round_columns, round_digits)
}
reportFriedman <- function(df, classifier, metric, metric_function, metric_direction=1, make_plot = T){
    averages = data.frame(df) %>%
        select(-c(Attributes, Number.of.classes, Min.class.examples, Max.class.examples)) %>%
        group_by(Dataset, Classifier, Feature.selector) %>%
        summarise_all(metric_function, na.rm=T) %>%
        data.frame() %>%
        filter(Feature.selector != 'All', Classifier == classifier) %>%
        select_("Dataset", "Feature.selector", metric) %>%
        spread("Feature.selector", metric) %>%
        select(-Dataset) %>%
        select(`FRFE`, `RFE-log`, `3-SRFE`, `RFE-log-3`, `5-SRFE`, `RFE-log-5`, `10-SRFE`, `RFE-log-10`) %>%
        data.matrix()
    
    if (metric_direction == 1) {
      averages_r = -averages
      averages_t = averages
    } else {
      averages_r = averages
      averages_t = -averages
    }
    ranks <- t(apply(averages_r, 1, rank))
    ranks <- ranks[,order(colMeans(ranks, na.rm=TRUE))]
    cat("<hr><strong>Friedman rank sum test</strong><br />")
    fTest <- friedman.test(averages_t)
    testResult <- capture.output(print(fTest))
    cat(testResult[5])
    cat("\r\n")
    mean_ranks <- t(colMeans(ranks, na.rm=TRUE))
    print(kable(mean_ranks, digits = 2))
    cat("\r\n")
    
    w_df <- data.frame(Subsecting = c("FRFE", "3-SRFE", "5-SRFE", "10-SRFE"),
                       Standard = c("RFE-log", "RFE-log-3", "RFE-log-5", "RFE-log-10"), 
                       p.value = c(-1, -1,-1, -1)) %>%
      mutate(Hypothesis = paste0(Subsecting, " vs ", Standard))
    cat("<hr><strong>Wilcoxon signed rank test</strong><br />")
    for (r in 1:nrow(w_df)){
      wTest <- wilcox.test(averages_t[,as.character(w_df[r, "Subsecting"])],
                           averages_t[,as.character(w_df[r, "Standard"])],
                           paired=TRUE, alternative = "g")
      w_df[r, "p.value"] <- wTest$p.value
    }
    print(kable(w_df %>% select(Hypothesis, p.value), digits = 3))
    cat("\r\n")
    
    
    
    if (make_plot){
        setEPS()
        par(mar=c(0,0,0,0))
        postscript(paste0("images/", metric, "_", classifier, "_",  "Friedman.eps"), width = 7, height = 3.4)
        plotCD(results.matrix = averages_t, alpha = 0.05, cex = 1.1)
        dev.off()
    }
    cat("<hr>")
    mean_ranks_df <- data.frame(`FRFE` = mean_ranks[1, "FRFE"], `RFE-log` = mean_ranks[1, "RFE-log"], `3-SRFE` = mean_ranks[1, "3-SRFE"], `RFE-log-3` = mean_ranks[1, "RFE-log-3"], `5-SRFE` = mean_ranks[1, "5-SRFE"], `RFE-log-5` = mean_ranks[1, "RFE-log-5"], `10-SRFE` = mean_ranks[1, "10-SRFE"], `RFE-log-10` = mean_ranks[1, "RFE-log-10"], `p-value` = fTest$p.value)
    mean_ranks_df
}

1 Benchmark results

1.1 Raw results

df <- read.csv("Benchmarks.csv", na.strings = c("?", "", "-")) %>%
    mutate(Dataset = as.character(Dataset), Classifier = as.character(Classifier), Selected.num = as.character(Selected.num)) %>%
    select(-c(Start.date, Selector.params, Scorer, Grid.scores, Selected.features))
df$Dataset <- substr(df$Dataset, 1, nchar(df$Dataset)-4)
df$Dataset <- as.factor(df$Dataset)
df$Classifier[startsWith(df$Classifier, "SVC")] <- "SVM"
df$Classifier[startsWith(df$Classifier, "Random")] <- "RF"
df$Classifier[startsWith(df$Classifier, "Logistic")] <- "LR"
df$Classifier[startsWith(df$Classifier, "LGBM")] <- "GBM"
df$Classifier <- as.factor(df$Classifier)
df$Selected.num[df$Selected.num == "error"] <- NA
df$Selected.num <- as.numeric(df$Selected.num)
prettyTable(df, c(9, 11:15))

1.2 Datasets

datasets_df <- df %>%
  select(Dataset, Examples, Attributes, Number.of.classes, Min.class.examples, Max.class.examples) %>%
  distinct()
  
prettyTable(datasets_df)

1.3 Mean cross-validation scores

cv_df <- df %>%
  select(-c(Attributes, Number.of.classes, Min.class.examples, Max.class.examples)) %>%
  group_by(Dataset, Classifier, Feature.selector) %>%
  summarise_all(mean, na_rm=T)
  
prettyTable(cv_df, c(5, 8:12))

2 Comparisons and statistical tests

classifiers <- as.character(unique(cv_df$Classifier))
metrics <- c("Selected.num", "Selected.num", "Accuracy", "Kappa", "Macro.recall", "G.mean", "Processing.time")
metric_functions <- c(mean, sd, mean, mean, mean, mean, mean)
mertric_headers <-c("Number of selected features", "Standard deviation of number of selected features", "Accuracy", "Kappa", "Macro recall", "G-mean", "Processing time")
metric_direction <- c(-1, -1, 1, 1, 1, 1, -1)
friedman_df <- data.frame(Metric = character(), Classifier = character(), `FRFE` = numeric(), `RFE-log` = numeric(), `3-SRFE` = numeric(), `RFE-log-3` = numeric(), `5-SRFE` = numeric(), `RFE-log-5` = numeric(), `10-SRFE` = numeric(), `RFE-log-10` = numeric(), `p-value` = numeric())
for (i in seq_along(metrics)){
  cat(paste0("## ", mertric_headers[i],"\r\n\r\n"))
  for (classifier in classifiers){
    cat(paste0("### ", classifier,"\r\n\r\n"))
    mean_ranks <- reportFriedman(df, classifier, metrics[i], metric_functions[i], metric_direction[i], make_plot = F)
    mean_df <- data.frame(Metric = mertric_headers[i], Classifier = classifier, `FRFE` = mean_ranks$FRFE, `RFE-log` = mean_ranks$RFE.log, `3-SRFE` = mean_ranks$X3.SRFE, `RFE-log-3` = mean_ranks$RFE.log.3, `5-SRFE` = mean_ranks$X5.SRFE, `RFE-log-5` = mean_ranks$RFE.log.5, `10-SRFE` = mean_ranks$X10.SRFE, `RFE-log-10` = mean_ranks$RFE.log.10, `p-value` = mean_ranks$p.value)
    friedman_df <- rbind(friedman_df, mean_df)
  }
}

2.1 Number of selected features

2.1.1 GBM


Friedman rank sum test
Friedman chi-squared = 43.604, df = 7, p-value = 2.549e-07
3-SRFE 5-SRFE FRFE 10-SRFE RFE-log RFE-log-3 RFE-log-5 RFE-log-10
3.02 3.7 3.75 3.82 5.25 5.29 5.54 5.64

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.003
3-SRFE vs RFE-log-3 0.000
5-SRFE vs RFE-log-5 0.002
10-SRFE vs RFE-log-10 0.003

2.1.2 LR


Friedman rank sum test
Friedman chi-squared = 49.037, df = 7, p-value = 2.231e-08
10-SRFE 3-SRFE 5-SRFE RFE-log-10 FRFE RFE-log-3 RFE-log-5 RFE-log
2.39 3.61 3.96 4.3 5.14 5.32 5.36 5.91

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.048
3-SRFE vs RFE-log-3 0.011
5-SRFE vs RFE-log-5 0.023
10-SRFE vs RFE-log-10 0.002

2.1.3 RF


Friedman rank sum test
Friedman chi-squared = 30.123, df = 7, p-value = 9.013e-05
10-SRFE 3-SRFE 5-SRFE RFE-log-5 RFE-log RFE-log-10 FRFE RFE-log-3
3.29 3.43 3.71 4.46 4.96 5.18 5.39 5.57

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.866
3-SRFE vs RFE-log-3 0.007
5-SRFE vs RFE-log-5 0.045
10-SRFE vs RFE-log-10 0.008

2.1.4 SVM


Friedman rank sum test
Friedman chi-squared = 37.86, df = 7, p-value = 3.221e-06
3-SRFE 10-SRFE 5-SRFE RFE-log-5 FRFE RFE-log-3 RFE-log-10 RFE-log
3.2 3.21 4 4.41 5.04 5.11 5.18 5.86

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.023
3-SRFE vs RFE-log-3 0.010
5-SRFE vs RFE-log-5 0.035
10-SRFE vs RFE-log-10 0.000

2.2 Standard deviation of number of selected features

2.2.1 GBM


Friedman rank sum test
Friedman chi-squared = 9.0074, df = 7, p-value = 0.2521
FRFE RFE-log 3-SRFE 5-SRFE 10-SRFE RFE-log-3 RFE-log-5 RFE-log-10
3.07 3.88 3.88 4.38 4.75 4.79 5.61 5.66

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.212
3-SRFE vs RFE-log-3 0.041
5-SRFE vs RFE-log-5 0.041
10-SRFE vs RFE-log-10 0.045

2.2.2 LR


Friedman rank sum test
Friedman chi-squared = 30.955, df = 7, p-value = 6.337e-05
10-SRFE 5-SRFE 3-SRFE FRFE RFE-log-5 RFE-log RFE-log-3 RFE-log-10
3.32 3.61 3.64 3.93 5.14 5.16 5.43 5.77

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.022
3-SRFE vs RFE-log-3 0.005
5-SRFE vs RFE-log-5 0.009
10-SRFE vs RFE-log-10 0.000

2.2.3 RF


Friedman rank sum test
Friedman chi-squared = 24.119, df = 7, p-value = 0.001086
FRFE RFE-log-10 RFE-log RFE-log-5 RFE-log-3 5-SRFE 10-SRFE 3-SRFE
3.46 3.88 4.09 4.11 4.29 4.64 5.61 5.93

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.140
3-SRFE vs RFE-log-3 0.929
5-SRFE vs RFE-log-5 0.786
10-SRFE vs RFE-log-10 0.997

2.2.4 SVM


Friedman rank sum test
Friedman chi-squared = 34.667, df = 7, p-value = 1.292e-05
FRFE 10-SRFE 3-SRFE 5-SRFE RFE-log-10 RFE-log RFE-log-5 RFE-log-3
3.18 3.43 3.75 4 5.07 5.39 5.57 5.61

Wilcoxon signed rank test
Hypothesis p.value
FRFE vs RFE-log 0.000
3-SRFE vs RFE-log-3 0.002
5-SRFE vs RFE-log-5 0.004
10-SRFE vs RFE-log-10 0.000

2.3 Accuracy

2.3.1 GBM


Friedman rank sum test
Friedman chi-squared = 7.8272, df = 7, p-value = 0.3481
RFE-log-3 RFE-log-5 RFE-log RFE-log-10 FRFE 3-SRFE 10-SRFE 5-SRFE
3.77 4.23 4.36 4.39 4.64 4.79 4.79 5.04

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.778
3-SRFE vs RFE-log-3 0.997
5-SRFE vs RFE-log-5 0.886
10-SRFE vs RFE-log-10 0.481

2.3.2 LR


Friedman rank sum test
Friedman chi-squared = 3.2789, df = 7, p-value = 0.8581
RFE-log-10 3-SRFE FRFE 5-SRFE RFE-log-5 10-SRFE RFE-log RFE-log-3
4.11 4.27 4.3 4.32 4.55 4.79 4.82 4.84

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.434
3-SRFE vs RFE-log-3 0.623
5-SRFE vs RFE-log-5 0.530
10-SRFE vs RFE-log-10 0.841

2.3.3 RF


Friedman rank sum test
Friedman chi-squared = 4.9125, df = 7, p-value = 0.6706
3-SRFE 5-SRFE RFE-log-10 RFE-log-3 FRFE RFE-log 10-SRFE RFE-log-5
4.04 4.05 4.21 4.41 4.66 4.75 4.91 4.96

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.457
3-SRFE vs RFE-log-3 0.415
5-SRFE vs RFE-log-5 0.092
10-SRFE vs RFE-log-10 0.928

2.3.4 SVM


Friedman rank sum test
Friedman chi-squared = 15.172, df = 7, p-value = 0.03385
RFE-log RFE-log-5 RFE-log-10 RFE-log-3 3-SRFE FRFE 10-SRFE 5-SRFE
3.82 4 4.02 4.34 4.45 4.64 5.14 5.59

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.928
3-SRFE vs RFE-log-3 0.847
5-SRFE vs RFE-log-5 0.995
10-SRFE vs RFE-log-10 0.839

2.4 Kappa

2.4.1 GBM


Friedman rank sum test
Friedman chi-squared = 8.0687, df = 7, p-value = 0.3266
RFE-log-3 RFE-log-5 RFE-log-10 RFE-log FRFE 10-SRFE 3-SRFE 5-SRFE
3.77 4.27 4.32 4.34 4.61 4.8 4.89 5

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.823
3-SRFE vs RFE-log-3 0.997
5-SRFE vs RFE-log-5 0.911
10-SRFE vs RFE-log-10 0.444

2.4.2 LR


Friedman rank sum test
Friedman chi-squared = 3.1857, df = 7, p-value = 0.8673
RFE-log-10 3-SRFE 5-SRFE FRFE RFE-log-5 RFE-log RFE-log-3 10-SRFE
4.11 4.29 4.3 4.38 4.46 4.82 4.82 4.82

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.536
3-SRFE vs RFE-log-3 0.618
5-SRFE vs RFE-log-5 0.518
10-SRFE vs RFE-log-10 0.823

2.4.3 RF


Friedman rank sum test
Friedman chi-squared = 5.2849, df = 7, p-value = 0.6252
5-SRFE RFE-log-10 3-SRFE RFE-log-3 FRFE RFE-log 10-SRFE RFE-log-5
4.07 4.11 4.12 4.34 4.64 4.73 4.98 5

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.520
3-SRFE vs RFE-log-3 0.424
5-SRFE vs RFE-log-5 0.136
10-SRFE vs RFE-log-10 0.913

2.4.4 SVM


Friedman rank sum test
Friedman chi-squared = 13.647, df = 7, p-value = 0.05784
RFE-log RFE-log-10 RFE-log-5 RFE-log-3 3-SRFE FRFE 10-SRFE 5-SRFE
3.86 3.95 4 4.43 4.52 4.66 5.07 5.52

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.938
3-SRFE vs RFE-log-3 0.835
5-SRFE vs RFE-log-5 0.994
10-SRFE vs RFE-log-10 0.855

2.5 Macro recall

2.5.1 GBM


Friedman rank sum test
Friedman chi-squared = 6.6426, df = 7, p-value = 0.467
RFE-log-3 RFE-log-5 RFE-log RFE-log-10 FRFE 10-SRFE 3-SRFE 5-SRFE
3.84 4.27 4.43 4.43 4.54 4.57 4.89 5.04

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.649
3-SRFE vs RFE-log-3 0.998
5-SRFE vs RFE-log-5 0.924
10-SRFE vs RFE-log-10 0.594

2.5.2 LR


Friedman rank sum test
Friedman chi-squared = 4.0346, df = 7, p-value = 0.7758
3-SRFE RFE-log-10 FRFE RFE-log-5 5-SRFE RFE-log-3 10-SRFE RFE-log
4.18 4.18 4.29 4.34 4.39 4.86 4.88 4.89

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.422
3-SRFE vs RFE-log-3 0.540
5-SRFE vs RFE-log-5 0.552
10-SRFE vs RFE-log-10 0.726

2.5.3 RF


Friedman rank sum test
Friedman chi-squared = 6.6782, df = 7, p-value = 0.4631
5-SRFE 3-SRFE RFE-log-10 RFE-log-3 FRFE RFE-log 10-SRFE RFE-log-5
3.86 4.11 4.12 4.41 4.68 4.88 4.96 4.98

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.390
3-SRFE vs RFE-log-3 0.385
5-SRFE vs RFE-log-5 0.094
10-SRFE vs RFE-log-10 0.960

2.5.4 SVM


Friedman rank sum test
Friedman chi-squared = 14.459, df = 7, p-value = 0.0436
RFE-log RFE-log-10 RFE-log-5 RFE-log-3 3-SRFE FRFE 10-SRFE 5-SRFE
3.82 3.98 4.12 4.27 4.43 4.71 5.11 5.55

Wilcoxon signed rank test
cannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.930
3-SRFE vs RFE-log-3 0.859
5-SRFE vs RFE-log-5 0.994
10-SRFE vs RFE-log-10 0.812

2.6 G-mean

2.6.1 GBM


Friedman rank sum test
Friedman chi-squared = 6.9734, df = 7, p-value = 0.4317
RFE-log-3 RFE-log-10 RFE-log-5 FRFE RFE-log 10-SRFE 5-SRFE 3-SRFE
3.77 4.32 4.41 4.48 4.52 4.61 4.86 5.04

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.596
3-SRFE vs RFE-log-3 0.998
5-SRFE vs RFE-log-5 0.807
10-SRFE vs RFE-log-10 0.831

2.6.2 LR


Friedman rank sum test
Friedman chi-squared = 4.5779, df = 7, p-value = 0.7113
RFE-log-10 FRFE RFE-log-5 5-SRFE 3-SRFE RFE-log-3 RFE-log 10-SRFE
4.12 4.27 4.27 4.29 4.34 4.84 4.89 4.98

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.177
3-SRFE vs RFE-log-3 0.554
5-SRFE vs RFE-log-5 0.470
10-SRFE vs RFE-log-10 0.917

2.6.3 RF


Friedman rank sum test
Friedman chi-squared = 9.725, df = 7, p-value = 0.2047
5-SRFE RFE-log-10 3-SRFE FRFE RFE-log-3 RFE-log 10-SRFE RFE-log-5
3.71 4.09 4.21 4.36 4.59 4.61 5.07 5.36

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.247
3-SRFE vs RFE-log-3 0.251
5-SRFE vs RFE-log-5 0.013
10-SRFE vs RFE-log-10 0.987

2.6.4 SVM


Friedman rank sum test
Friedman chi-squared = 12.58, df = 7, p-value = 0.08302
RFE-log RFE-log-10 RFE-log-5 3-SRFE RFE-log-3 FRFE 10-SRFE 5-SRFE
3.79 4.12 4.25 4.27 4.34 4.57 5.21 5.45

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with tiescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.876
3-SRFE vs RFE-log-3 0.610
5-SRFE vs RFE-log-5 0.962
10-SRFE vs RFE-log-10 0.762

2.7 Processing time

2.7.1 GBM


Friedman rank sum test
Friedman chi-squared = 92.266, df = 7, p-value < 2.2e-16
FRFE RFE-log 5-SRFE 3-SRFE RFE-log-5 10-SRFE RFE-log-3 RFE-log-10
2.18 2.5 4.43 4.71 4.73 5.09 5.96 6.39

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.500
3-SRFE vs RFE-log-3 0.019
5-SRFE vs RFE-log-5 0.040
10-SRFE vs RFE-log-10 0.008

2.7.2 LR


Friedman rank sum test
Friedman chi-squared = 90.481, df = 7, p-value < 2.2e-16
FRFE RFE-log 5-SRFE RFE-log-5 10-SRFE 3-SRFE RFE-log-3 RFE-log-10
1.91 2.41 4.02 4.91 5.09 5.23 5.73 6.7

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.524
3-SRFE vs RFE-log-3 0.346
5-SRFE vs RFE-log-5 0.232
10-SRFE vs RFE-log-10 0.113

2.7.3 RF


Friedman rank sum test
Friedman chi-squared = 152.58, df = 7, p-value < 2.2e-16
RFE-log FRFE RFE-log-5 RFE-log-3 5-SRFE RFE-log-10 10-SRFE 3-SRFE
1.52 1.84 3.45 4.66 4.91 5.55 6.66 7.41

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.978
3-SRFE vs RFE-log-3 1.000
5-SRFE vs RFE-log-5 1.000
10-SRFE vs RFE-log-10 0.981

2.7.4 SVM


Friedman rank sum test
Friedman chi-squared = 92.818, df = 7, p-value < 2.2e-16
FRFE RFE-log 5-SRFE RFE-log-5 3-SRFE 10-SRFE RFE-log-3 RFE-log-10
1.88 2.2 4.39 4.86 5.21 5.25 5.71 6.5

Wilcoxon signed rank test
cannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroescannot compute exact p-value with zeroes
Hypothesis p.value
FRFE vs RFE-log 0.627
3-SRFE vs RFE-log-3 0.530
5-SRFE vs RFE-log-5 0.352
10-SRFE vs RFE-log-10 0.223

2.8 Friedman tests summary

prettyTable(friedman_df, 3:ncol(friedman_df), 3)

3 Feature selection plots

df_grid <- read.csv(gzfile("GridScores.csv.gz", "GridScores.csv"), na.strings = c("?", "", "-")) %>% 
  mutate(Dataset = as.character(Dataset), Classifier = as.character(Classifier))

seek on a gzfile connection returned an internal errorseek on a gzfile connection returned an internal errorWarning: closing unused connection 3 (GridScores.csv.gz:GridScores.csv)

df_grid$Dataset <- substr(df_grid$Dataset, 1, nchar(df_grid$Dataset)-4)
df_grid$Dataset <- as.factor(df_grid$Dataset)
df_grid$Classifier[startsWith(df_grid$Classifier, "SVC")] <- "SVM"
df_grid$Classifier[startsWith(df_grid$Classifier, "Random")] <- "RF"
df_grid$Classifier[startsWith(df_grid$Classifier, "Logistic")] <- "LR"
df_grid$Classifier[startsWith(df_grid$Classifier, "LGBM")] <- "GBM"
df_grid$Classifier <- as.factor(df_grid$Classifier)
df_grid$Feature.selector = factor(df_grid$Feature.selector,levels(df_grid$Feature.selector)[c(4,2,3,1,5,7,8,6)])
df_grid$Type <- as.character(df_grid$Feature.selector)
df_grid$Type[df_grid$Type == "FRFE" | df_grid$Type == "3-SRFE" | df_grid$Type == "5-SRFE" | df_grid$Type == "10-SRFE"] <- "FRFE/k-SRFE"
df_grid$Type[df_grid$Type != "FRFE/k-SRFE"] <- "RFE"
df_grid$Type <- as.factor(df_grid$Type)
cbPalette <- c("#a6cee3", "#b2df8a", "#fb9a99", "#fdbf6f", "#1f78b4", "#33a02c", "#e31a1c", "#ff7f00")
for (dataset in unique(df_grid$Dataset)){
  plot_df <- df_grid %>% filter(Dataset == dataset, Fold == 0)
  p <- ggplot(plot_df, aes(Feature.num, Accuracy, color=Feature.selector)) + geom_point(alpha=0.6) + facet_grid(Classifier~Type) + theme_bw() + scale_color_manual(values=cbPalette, name="Selector") + xlab("Number of features")
  ggsave(paste0("images/", dataset, ".svg"), plot = p, dpi = 300, width=8, height=4, units = "in")
  ggsave(paste0("images/", dataset, ".png"), plot = p, dpi = 300, width=8, height=4, units = "in")
  print(p + ggtitle(dataset))
}

Warning: closing unused connection 4 (GridScores.csv.gz)

4 Case study

4.1 Raw results

case_df <- read.csv("CaseStudy.csv", na.strings = c("?", "", "-")) %>%
    mutate(Dataset = as.character(Dataset), Classifier = as.character(Classifier), Selected.num = as.character(Selected.num)) %>%
    select(-c(Start.date, Selector.params, Scorer, Grid.scores, Selected.features))
case_df$Dataset <- substr(case_df$Dataset, 1, nchar(case_df$Dataset)-4)
case_df$Dataset <- as.factor(case_df$Dataset)
case_df$Classifier[startsWith(case_df$Classifier, "SVC")] <- "SVM"
case_df$Classifier[startsWith(case_df$Classifier, "Random")] <- "RF"
case_df$Classifier[startsWith(case_df$Classifier, "Logistic")] <- "LR"
case_df$Classifier[startsWith(case_df$Classifier, "LGBM")] <- "GBM"
case_df$Classifier <- as.factor(case_df$Classifier)
case_df$Selected.num[case_df$Selected.num == "error"] <- NA
case_df$Selected.num <- as.numeric(case_df$Selected.num)
prettyTable(case_df, c(9, 11:15))

4.2 Mean cross-validation scores

case_cv_df <- case_df %>%
  select(-c(Attributes, Number.of.classes, Min.class.examples, Max.class.examples)) %>%
  group_by(Dataset, Classifier, Feature.selector) %>%
  summarise_all(mean, na_rm=T)
  
prettyTable(case_cv_df, c(5, 8:12))

4.3 Common features

scores_df <- read.csv("KappaScores.csv", na.strings = c("?", "", "-")) %>%
    mutate(Dataset = as.character(Dataset), Classifier = as.character(Classifier))
scores_df$Dataset <- substr(scores_df$Dataset, 1, nchar(scores_df$Dataset)-9)
scores_df$Dataset <- as.factor(scores_df$Dataset)
scores_df$Classifier[startsWith(scores_df$Classifier, "SVC")] <- "SVM"
scores_df$Classifier[startsWith(scores_df$Classifier, "Random")] <- "RF"
scores_df$Classifier[startsWith(scores_df$Classifier, "Logistic")] <- "LR"
scores_df$Classifier[startsWith(scores_df$Classifier, "LGBM")] <- "GBM"
scores_df$Classifier <- as.factor(scores_df$Classifier)
  
prettyTable(scores_df, 5)
---
title: "Fibonacci and k-Subsecting Recursive Feature Elimination: Supplement"
output: 
  html_notebook: 
    code_folding: hide
    number_sections: yes
    toc: yes
    toc_depth: 2
---

```{r libraries, message=FALSE, warning=FALSE}
library(knitr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)
library(DT)
library(ggthemes)
library(PMCMR)
library(scmamp)

knitr::opts_chunk$set(dpi=96, results = "asis")
knitr::knit_hooks$set(inline = function(x) {
  prettyNum(x, big.mark=",")
})
SAVE_EPS = F
```

```{r functions, warning=F, message=F}
prettyTable <- function(table_df, round_columns=numeric(), round_digits=3, make_plot=T) {
    DT::datatable(table_df, style="bootstrap", filter = "top", rownames = FALSE, extensions = "Buttons",
                  options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel', 'pdf', 'print'))) %>%
    formatRound(round_columns, round_digits)
}

reportFriedman <- function(df, classifier, metric, metric_function, metric_direction=1, make_plot = T){
    averages = data.frame(df) %>%
        select(-c(Attributes, Number.of.classes, Min.class.examples, Max.class.examples)) %>%
        group_by(Dataset, Classifier, Feature.selector) %>%
        summarise_all(metric_function, na.rm=T) %>%
        data.frame() %>%
        filter(Feature.selector != 'All', Classifier == classifier) %>%
        select_("Dataset", "Feature.selector", metric) %>%
        spread("Feature.selector", metric) %>%
        select(-Dataset) %>%
        select(`FRFE`, `RFE-log`, `3-SRFE`, `RFE-log-3`, `5-SRFE`, `RFE-log-5`, `10-SRFE`, `RFE-log-10`) %>%
        data.matrix()
    
    if (metric_direction == 1) {
      averages_r = -averages
      averages_t = averages
    } else {
      averages_r = averages
      averages_t = -averages
    }
    ranks <- t(apply(averages_r, 1, rank))
    ranks <- ranks[,order(colMeans(ranks, na.rm=TRUE))]

    cat("<hr><strong>Friedman rank sum test</strong><br />")
    fTest <- friedman.test(averages_t)
    testResult <- capture.output(print(fTest))
    cat(testResult[5])
    cat("\r\n")
    mean_ranks <- t(colMeans(ranks, na.rm=TRUE))
    print(kable(mean_ranks, digits = 2))
    cat("\r\n")
    
    w_df <- data.frame(Subsecting = c("FRFE", "3-SRFE", "5-SRFE", "10-SRFE"),
                       Standard = c("RFE-log", "RFE-log-3", "RFE-log-5", "RFE-log-10"), 
                       p.value = c(-1, -1,-1, -1)) %>%
      mutate(Hypothesis = paste0(Subsecting, " vs ", Standard))
    cat("<hr><strong>Wilcoxon signed rank test</strong><br />")
    for (r in 1:nrow(w_df)){
      wTest <- wilcox.test(averages_t[,as.character(w_df[r, "Subsecting"])],
                           averages_t[,as.character(w_df[r, "Standard"])],
                           paired=TRUE, alternative = "g")
      w_df[r, "p.value"] <- wTest$p.value
    }
    print(kable(w_df %>% select(Hypothesis, p.value), digits = 3))
    cat("\r\n")
    
    
    
    if (make_plot){
        setEPS()
        par(mar=c(0,0,0,0))
        postscript(paste0("images/", metric, "_", classifier, "_",  "Friedman.eps"), width = 7, height = 3.4)
        plotCD(results.matrix = averages_t, alpha = 0.05, cex = 1.1)
        dev.off()
    }

    cat("<hr>")
    mean_ranks_df <- data.frame(`FRFE` = mean_ranks[1, "FRFE"], `RFE-log` = mean_ranks[1, "RFE-log"], `3-SRFE` = mean_ranks[1, "3-SRFE"], `RFE-log-3` = mean_ranks[1, "RFE-log-3"], `5-SRFE` = mean_ranks[1, "5-SRFE"], `RFE-log-5` = mean_ranks[1, "RFE-log-5"], `10-SRFE` = mean_ranks[1, "10-SRFE"], `RFE-log-10` = mean_ranks[1, "RFE-log-10"], `p-value` = fTest$p.value)
    mean_ranks_df
}
```


# Benchmark results

## Raw results

```{r raw summary}
df <- read.csv("Benchmarks.csv", na.strings = c("?", "", "-")) %>%
    mutate(Dataset = as.character(Dataset), Classifier = as.character(Classifier), Selected.num = as.character(Selected.num)) %>%
    select(-c(Start.date, Selector.params, Scorer, Grid.scores, Selected.features))


df$Dataset <- substr(df$Dataset, 1, nchar(df$Dataset)-4)
df$Dataset <- as.factor(df$Dataset)

df$Classifier[startsWith(df$Classifier, "SVC")] <- "SVM"
df$Classifier[startsWith(df$Classifier, "Random")] <- "RF"
df$Classifier[startsWith(df$Classifier, "Logistic")] <- "LR"
df$Classifier[startsWith(df$Classifier, "LGBM")] <- "GBM"
df$Classifier <- as.factor(df$Classifier)

df$Selected.num[df$Selected.num == "error"] <- NA
df$Selected.num <- as.numeric(df$Selected.num)

prettyTable(df, c(9, 11:15))
```

## Datasets

```{r dataset summary}
datasets_df <- df %>%
  select(Dataset, Examples, Attributes, Number.of.classes, Min.class.examples, Max.class.examples) %>%
  distinct()
  
prettyTable(datasets_df)
```

## Mean cross-validation scores

```{r cv summary, warning=F}
cv_df <- df %>%
  select(-c(Attributes, Number.of.classes, Min.class.examples, Max.class.examples)) %>%
  group_by(Dataset, Classifier, Feature.selector) %>%
  summarise_all(mean, na_rm=T)
  

prettyTable(cv_df, c(5, 8:12))
```

# Comparisons and statistical tests {.tabset}

```{r, results="asis", warning=F, message=F, error=F}
classifiers <- as.character(unique(cv_df$Classifier))
metrics <- c("Selected.num", "Selected.num", "Accuracy", "Kappa", "Macro.recall", "G.mean", "Processing.time")
metric_functions <- c(mean, sd, mean, mean, mean, mean, mean)
mertric_headers <-c("Number of selected features", "Standard deviation of number of selected features", "Accuracy", "Kappa", "Macro recall", "G-mean", "Processing time")
metric_direction <- c(-1, -1, 1, 1, 1, 1, -1)

friedman_df <- data.frame(Metric = character(), Classifier = character(), `FRFE` = numeric(), `RFE-log` = numeric(), `3-SRFE` = numeric(), `RFE-log-3` = numeric(), `5-SRFE` = numeric(), `RFE-log-5` = numeric(), `10-SRFE` = numeric(), `RFE-log-10` = numeric(), `p-value` = numeric())

for (i in seq_along(metrics)){
  cat(paste0("## ", mertric_headers[i],"\r\n\r\n"))
  for (classifier in classifiers){
    cat(paste0("### ", classifier,"\r\n\r\n"))
    mean_ranks <- reportFriedman(df, classifier, metrics[i], metric_functions[i], metric_direction[i], make_plot = F)
    mean_df <- data.frame(Metric = mertric_headers[i], Classifier = classifier, `FRFE` = mean_ranks$FRFE, `RFE-log` = mean_ranks$RFE.log, `3-SRFE` = mean_ranks$X3.SRFE, `RFE-log-3` = mean_ranks$RFE.log.3, `5-SRFE` = mean_ranks$X5.SRFE, `RFE-log-5` = mean_ranks$RFE.log.5, `10-SRFE` = mean_ranks$X10.SRFE, `RFE-log-10` = mean_ranks$RFE.log.10, `p-value` = mean_ranks$p.value)
    friedman_df <- rbind(friedman_df, mean_df)
  }
}

```

## Friedman tests summary

```{r freidmanSummary}
prettyTable(friedman_df, 3:ncol(friedman_df), 3)
```


# Feature selection plots

```{r feature plots}
df_grid <- read.csv(gzfile("GridScores.csv.gz", "GridScores.csv"), na.strings = c("?", "", "-")) %>% 
  mutate(Dataset = as.character(Dataset), Classifier = as.character(Classifier))
df_grid$Dataset <- substr(df_grid$Dataset, 1, nchar(df_grid$Dataset)-4)
df_grid$Dataset <- as.factor(df_grid$Dataset)

df_grid$Classifier[startsWith(df_grid$Classifier, "SVC")] <- "SVM"
df_grid$Classifier[startsWith(df_grid$Classifier, "Random")] <- "RF"
df_grid$Classifier[startsWith(df_grid$Classifier, "Logistic")] <- "LR"
df_grid$Classifier[startsWith(df_grid$Classifier, "LGBM")] <- "GBM"
df_grid$Classifier <- as.factor(df_grid$Classifier)

df_grid$Feature.selector = factor(df_grid$Feature.selector,levels(df_grid$Feature.selector)[c(4,2,3,1,5,7,8,6)])

df_grid$Type <- as.character(df_grid$Feature.selector)
df_grid$Type[df_grid$Type == "FRFE" | df_grid$Type == "3-SRFE" | df_grid$Type == "5-SRFE" | df_grid$Type == "10-SRFE"] <- "FRFE/k-SRFE"
df_grid$Type[df_grid$Type != "FRFE/k-SRFE"] <- "RFE"
df_grid$Type <- as.factor(df_grid$Type)


cbPalette <- c("#a6cee3", "#b2df8a", "#fb9a99", "#fdbf6f", "#1f78b4", "#33a02c", "#e31a1c", "#ff7f00")

for (dataset in unique(df_grid$Dataset)){
  plot_df <- df_grid %>% filter(Dataset == dataset, Fold == 0)
  p <- ggplot(plot_df, aes(Feature.num, Accuracy, color=Feature.selector)) + geom_point(alpha=0.6) + facet_grid(Classifier~Type) + theme_bw() + scale_color_manual(values=cbPalette, name="Selector") + xlab("Number of features")
  ggsave(paste0("images/", dataset, ".svg"), plot = p, dpi = 300, width=8, height=4, units = "in")
  ggsave(paste0("images/", dataset, ".png"), plot = p, dpi = 300, width=8, height=4, units = "in")
  print(p + ggtitle(dataset))
}
```

# Case study

## Raw results

```{r case_study}
case_df <- read.csv("CaseStudy.csv", na.strings = c("?", "", "-")) %>%
    mutate(Dataset = as.character(Dataset), Classifier = as.character(Classifier), Selected.num = as.character(Selected.num)) %>%
    select(-c(Start.date, Selector.params, Scorer, Grid.scores, Selected.features))


case_df$Dataset <- substr(case_df$Dataset, 1, nchar(case_df$Dataset)-9)
case_df$Dataset <- as.factor(case_df$Dataset)

case_df$Classifier[startsWith(case_df$Classifier, "SVC")] <- "SVM"
case_df$Classifier[startsWith(case_df$Classifier, "Random")] <- "RF"
case_df$Classifier[startsWith(case_df$Classifier, "Logistic")] <- "LR"
case_df$Classifier[startsWith(case_df$Classifier, "LGBM")] <- "GBM"
case_df$Classifier <- as.factor(case_df$Classifier)

case_df$Selected.num[case_df$Selected.num == "error"] <- NA
case_df$Selected.num <- as.numeric(case_df$Selected.num)

prettyTable(case_df, c(9, 11:15))
```
## Mean cross-validation scores

```{r case cv summary, warning=F}
case_cv_df <- case_df %>%
  select(-c(Attributes, Number.of.classes, Min.class.examples, Max.class.examples)) %>%
  group_by(Dataset, Classifier, Feature.selector) %>%
  summarise_all(mean, na_rm=T)
  

prettyTable(case_cv_df, c(5, 8:12))
```

## Common features

```{r case common features, warning=F}
scores_df <- read.csv("KappaScores.csv", na.strings = c("?", "", "-")) %>%
    mutate(Dataset = as.character(Dataset), Classifier = as.character(Classifier))


scores_df$Dataset <- substr(scores_df$Dataset, 1, nchar(scores_df$Dataset)-9)
scores_df$Dataset <- as.factor(scores_df$Dataset)

scores_df$Classifier[startsWith(scores_df$Classifier, "SVC")] <- "SVM"
scores_df$Classifier[startsWith(scores_df$Classifier, "Random")] <- "RF"
scores_df$Classifier[startsWith(scores_df$Classifier, "Logistic")] <- "LR"
scores_df$Classifier[startsWith(scores_df$Classifier, "LGBM")] <- "GBM"
scores_df$Classifier <- as.factor(scores_df$Classifier)
  

prettyTable(scores_df, 5)
```